Querying the DHS API in R

Call the DHS application program interface (API) from R

Walkthrough

Load Packages

knitr::opts_chunk$set(echo = TRUE)
library(data.table)
library(jsonlite)
library(dplyr)
library(ggplot2)
library(DT)

Set up query

# Abbreviated country code, check list here:
# https://dhsprogram.com/data/File-Types-and-Names.cfm#CP_JUMP_10136

countryIds=c('BF') # countryIDs in quotes, separated by commas if multiple

# Level of stratification ('national' or 'subnational')

breakdown=c('subnational')

# Indicator(s) of interest. For a full list of 3,785 DHS indicators, look here:
# https://api.dhsprogram.com/rest/dhs/indicators?returnFields=IndicatorId,Label,Definition&f=html
#  
# Example malaria indicators:
# - CH_FEVR_C_FEV = % of children U5 (or U3) with fever in the 2 weeks preceding the survey
# - ML_FEVT_C_ADV   = % of children U5 with recent (<2wk) fever for whom advice/treatment was sought
# - ML_NETP_H_ITN = % of households with at least 1 ITN
# - ML_IRSM_H_IRS = % of households with IRS in the last 12 months
# - ML_PMAL_C_RDT = Malaria prevalence among children age 6-59 months tested by RDT 
# - ML_PMAL_C_RDL = Lower limit of 95% CI for ML_PMAL_C_RDT
# - ML_PMAL_C_RDU = Upper limit of 95% CI for ML_PMAL_C_RDT
#
# Note: some indicators may not apply to all countries/districts/surveys

indicatorIds=c('ML_IRSM_H_IRS','ML_NETP_H_ITN','ML_FEVT_C_ADV','ML_PMAL_C_RDT', 'ML_PMAL_C_RDL', 'ML_PMAL_C_RDU')

Execute query & plot results

#### Build API Query ####

base_url <- "http://api.dhsprogram.com/rest/dhs/data?f=json&surveyid=all"
end_url <- "&lang=en&f=json"
url <- paste(base_url,
             "&countryIds=",paste(countryIds,collapse=','),
             "&breakdown=",breakdown,
             "&indicatorIds=",paste(indicatorIds,collapse=','),
             end_url,
             sep = '')
url <- (url)


#### Call API ####

dhs_data <- fromJSON(url) # get page
dhs_data <- dhs_data$Data # save dataframe


#### Plot Results ###

# Trends in intervention indicators by country-region 
dhs_data %>% 
  filter(!IndicatorId %in% c('ML_PMAL_C_RDT', 'ML_PMAL_C_RDL', 'ML_PMAL_C_RDU')) %>%
  ggplot(aes(x=SurveyYear, y=Value, color = Indicator)) +
  geom_line(size=1) +
  geom_point(size=2, shape=21, fill="white") +
  facet_wrap(~paste(CountryName,CharacteristicLabel,sep='-'), ncol=5) +
  theme_minimal() +
  theme(legend.position="bottom",
        legend.justification = c(0,0),
        axis.text.x=element_text(angle=90, vjust=0.5),
        panel.grid.minor.x = element_blank()) +
  scale_x_continuous(breaks=seq(2000,2020,1)) +
  guides(color=guide_legend(direction = "vertical")) +
  labs(color=NULL)

# RDT Prevalence trends by country-region (where available)
dhs_data %>% 
  filter(IndicatorId %in% c('ML_PMAL_C_RDT', 'ML_PMAL_C_RDL', 'ML_PMAL_C_RDU')) %>%
  ggplot(aes(x=SurveyYear, y=Value)) +
  geom_line(aes(group=SurveyYear)) +
  geom_line(data=dhs_data[dhs_data$IndicatorId=="ML_PMAL_C_RDT",],
            size=1) +
  geom_point(size=2, shape=21, fill="white") +
  facet_wrap(~paste(CountryName,CharacteristicLabel,sep='-'), ncol=5) +
  theme_minimal() +
  theme(legend.position="bottom",
        legend.justification = c(0,0),
        axis.text.x=element_text(angle=90, vjust=0.5),
        panel.grid.minor.x = element_blank()) +
  scale_x_continuous(breaks=seq(2000,2025,1)) +
  guides(color=guide_legend(direction = "vertical")) +
  ylab("Malaria Prevalence by RDT (U5)") +
  labs(color=NULL) 

Get list of available indicators

This section generates an interactive table of indicators at the subnational level for selected surveys/countries/indicators. As written, this code is useful for seeing which indicators were reported and when, for a given area.

This code is very slow!
##########################################################
### View sample of available data for select countries ###
##########################################################


# 1. Get list of surveys from selected countries
################################################

countryIds=c('BF') # selected country

### Build Query ###
base_url <- "http://api.dhsprogram.com/rest/dhs/surveys?f=json"

url <- paste(base_url,
             "&countryIds=",paste(countryIds,collapse=','),
             end_url,
             sep = '')
url <- (url)

### Call API ###
surveys <- fromJSON(url) # get page
surveys <- surveys$Data # save dataframe

surveyIds <- surveys$SurveyId

# 2. Set list of indicators
###############################################

# You can use all of the malaria indicators below, or a subset, or create a list of any indicators (including non-malaria).

malaria_indicators <- c("ML_NETP_H_MOS","ML_NETP_H_ITN","ML_NETP_H_LLN","ML_NETP_H_MNM","ML_NETP_H_MNI","ML_NETP_H_MNL","ML_NETP_H_NUM","ML_NETP_H_UNW","ML_NETP_H_MS2","ML_NETP_H_IT2","ML_NETP_H_LL2","ML_NETP_H_NM2","ML_NETP_H_UN2","ML_IRSM_H_IRS","ML_IRSM_H_IIR","ML_IRSM_H_I2I","ML_IRSM_H_NUM","ML_IRSM_H_UNW","ML_ITNA_P_NUM","ML_ITNA_P_UNW","ML_ITNA_P_ACC","ML_NETU_P_ANY","ML_NETU_P_ITN","ML_NETU_P_LLN","ML_NETU_P_ITI","ML_NETU_P_NUM","ML_NETU_P_UNW","ML_NETU_P_IT1","ML_NETU_P_NM1","ML_NETU_P_UN1","ML_ITNU_N_ITN","ML_ITNU_N_NUM","ML_ITNU_N_UNW","ML_NETC_C_ANY","ML_NETC_C_ITN","ML_NETC_C_LLN","ML_NETC_C_ITI","ML_NETC_C_NUM","ML_NETC_C_UNW","ML_NETC_C_IT1","ML_NETC_C_NM1","ML_NETC_C_UN1","ML_NETW_W_ANY","ML_NETW_W_ITN","ML_NETW_W_LLN","ML_NETW_W_ITI","ML_NETW_W_NUM","ML_NETW_W_UNW","ML_NETW_W_IT1","ML_NETW_W_NM1","ML_NETW_W_UN1","ML_IPTP_W_SPF","ML_IPTP_W_2SP","ML_IPTP_W_3SP","ML_IPTP_W_SPA","ML_IPTP_W_2SA","ML_IPTP_W_3SA","ML_IPTP_W_NUM","ML_IPTP_W_UNW","ML_FEVR_C_FEV","ML_FEVR_C_NUM","ML_FEVR_C_UNW","ML_FEVT_C_ADV","ML_FEVT_C_BLD","ML_FEVT_C_ACT","ML_FEVT_C_ACS","ML_FEVT_C_AML","ML_FEVT_C_AMS","ML_FEVT_C_NUM","ML_FEVT_C_UNW","ML_AMLD_C_ACT","ML_AMLD_C_QNN","ML_AMLD_C_SPF","ML_AMLD_C_CHL","ML_AMLD_C_AMQ","ML_AMLD_C_NUM","ML_AMLD_C_OAM","ML_AMLD_C_UNW","ML_CMLT_C_ANM","ML_CMLT_C_RDT","ML_CMLT_C_MSY","ML_CMLT_C_NUM","ML_HEMO_C_HL8","ML_HEMO_C_NUM","ML_HEMO_C_UNW","ML_PMAL_C_RDT","ML_PMAL_C_RDE","ML_PMAL_C_RDR","ML_PMAL_C_RDL","ML_PMAL_C_RDU","ML_PMAL_C_NMR","ML_PMAL_C_UNR","ML_PMAL_C_UER","ML_PMAL_C_MSY","ML_PMAL_C_MSE","ML_PMAL_C_MSR","ML_PMAL_C_MSL","ML_PMAL_C_MSU","ML_PMAL_C_NMM","ML_PMAL_C_UNM","ML_PMAL_C_UEM","ML_NSRC_N_MDC","ML_NSRC_N_ANC","ML_NSRC_N_IMM","ML_NSRC_N_GHF","ML_NSRC_N_PHF","ML_NSRC_N_PHM","ML_NSRC_N_SHP","ML_NSRC_N_CHW","ML_NSRC_N_REL","ML_NSRC_N_SCL","ML_NSRC_N_OTH","ML_NSRC_N_DKM","ML_NSRC_N_TOT","ML_NSRC_N_NUM","ML_FEVT_C_ADS","ML_AMLD_C_ART")

indicatorIds= malaria_indicators    # An empty string '' will query ALL indicators (probably very slow)

counter=0
for(survey in surveyIds)
{
  for(indicator in indicatorIds)
  {
    ### Build Query ###
    base_url <- "http://api.dhsprogram.com/rest/dhs/data?f=json"

    url <- paste(base_url,
                 "&surveyIds=",survey,
                 "&breakdown=",breakdown,
                 "&indicatorIds=",indicator,
                 end_url,
                 sep = '')
    url <- (url)
    
    ### Call API ###
    indicators_temp <- fromJSON(url) # get page
    if(!is_empty(indicators_temp$Data))
    {
      if(counter==0)
      {
        indicators <- indicators_temp$Data
      }
      else
      {
        indicators <- rbind.data.frame(indicators,indicators_temp$Data)
      }
      counter <- counter+1
    }
  }
}

### Inspect Available Data ### 

#summary(dhs_data)

indicators %>%
  select(SurveyId, IndicatorId, Indicator, IsPreferred, Value,CharacteristicLabel, ByVariableLabel) %>%
  datatable(filter="top", options = list(pageLength = 20, dom = 'ftlip'))